TidyTuesday 2021-07-27
Olympic history from 1948 to 2016 in Brazil

Resumo

Este trabalho foi feito com base nos dados históricos olímpicos modernos (fonte:kaggle), tendo como objetivo realizar uma comparação do brasil com a média olimpica e os maiores medalistas americanos (Estados unidos e Canadá) em relação à participação feminina de 1948 até 2016. Para isso foram feitos agrupamentos por esporte praticado, ano da olímpiada, bem como por número de medalha por gênero. Canadá e Estados Unidos foram escolhidos para comparação por estarem no continente americano, possuírem tamanhos similares ao do Brasil, além do fato de terem participado de todas as edições olímpicas no período estudado.

Tidytuesday

Este trabalho foi feito a partir do tidytuesday do ano de 2021, semana 31. No decorrer dessa seção será mostrado o código utilizado nessa análise e o projeto completo pode ser encontrados no repositório github.

Limpeza de dados

#Mesclando as bases para dar utilidade a coluna "region" da tabela "regions"
olympics <- left_join(tuesdata$regions, tuesdata$olympics, by = c("NOC" = "noc"))

####Limpando os Dados####
#limpando a base de algumas colunas que não serao utilizadas,
#bem como transformando outras em fatores para melhor manipulação
olympics <- olympics |>
  mutate(
    medal = replace_na(medal, "None"),
    sex = factor(sex, levels = c("F", "M")),
    medal = ordered(medal, levels = c("None", "Bronze", "Silver", "Gold")),
    season = factor(season, levels = c("Summer", "Winter")),
    year = factor(year, ordered = TRUE),
    id = factor(id),
    NOC = factor(NOC)
  ) |>
  select(-c(notes, age, height, weight, team))

Variáveis auxiliáres

####Variaveis auxiliares####
#Paises na análise
countrys <- c("CAN", "USA", "BRA")
#Cores das bandeiras dos paises
country_colors <- cbind(
  c("#FFFAFA", "#FF0000"),
  c("#3C3B6E", "#B22234"),
  c("#FFDF00", "#009C3B")
)
olympic_color <- c("#35B2C9", "#FFBF00")
colnames(country_colors) <- countrys

#Lista que armazenará as Imagens geradas
list_fig_sex_sport <- list()
list_fig_sex_year <- list()
list_fig_medal_sport <- list()

# Figuras auxiliares
Olympic_rings <- png::readPNG("fig/Olympic_rings.png") |> 
  rasterGrob(interpolate = TRUE)
flag_canada <- png::readPNG("fig/canada.png")  |> 
  rasterGrob(interpolate = TRUE)
flag_usa <- png::readPNG("fig/usa.png") |> 
  rasterGrob(interpolate = TRUE)
flag_brazil <- png::readPNG("fig/brazil.png") |> 
  rasterGrob(interpolate = TRUE)


flags <- list(flag_canada, flag_usa, flag_brazil)

Análise geral

Olímpiada por ano

sex_per_year <- olympics |>
  filter(season == "Summer" & year >= 1948) |>
  group_by(year) |>
  count(sex) |>
  group_by(year) |>
  mutate(percent = 100 * n / sum(n)) |>
  filter(percent < 100) |>
  ungroup() |>
  dplyr::select(-n) |>
  tidyr::pivot_wider(
    names_from = sex,
    values_from = percent,
    names_prefix = "percent_"
  ) |>
  dplyr::mutate(year = forcats::fct_reorder(year, desc(percent_F))) |>
  tidyr::pivot_longer(cols = c("percent_F", "percent_M"),
                      values_to = "percent") |>
  dplyr::rename("sex" = name) |>
  dplyr::mutate(
    sex = stringr::str_remove(sex, "percent_"),
    sex = forcats::fct_relevel(sex, c("M", "F"))
  ) 



fig_olympic_year <- sex_per_year |>
  ggplot(mapping = aes(percent, year)) +
  geom_col(aes(fill = sex, color = sex), position = "stack") +
  scale_color_manual(values = olympic_color, labs("")) +
  scale_fill_manual(values = olympic_color, labs("")) +
  geom_vline(xintercept = 50,
             linetype = "dashed",
             size = 0.7) +
  hrbrthemes::scale_x_percent(scale = 1) +
  hrbrthemes::theme_ipsum_pub() +
  theme(
    legend.title = element_blank(),
    panel.grid.major = element_blank(),
    panel.grid.minor  = element_blank(),
    legend.text = element_text(size = 20),
    axis.text.x = element_text(size = 20),
    axis.text.y = element_text(size = 20),
    plot.caption = element_text(size = 20),
    plot.subtitle = element_text(size = 20, vjust = -1)
  ) +
  labs(
    y = "",
    x = "",
    subtitle = "Participation by gender and year",
    caption = "@talesgomes2709 | #tidytuesday | source: kaggle"
  ) +
  ggtitle("")

fig_olympic_year <- ggplotGrob(fig_olympic_year)
  
new_title <- gtable(unit(c(0.8, 6.9, 0.8), "in"), unit(0.8, "in")) |>
  gtable_add_grob(grobs = Olympic_rings, t = 1, l = 1) |>
  gtable_add_grob(textGrob(label = "Total olympic participation from 1948 to 2017",
                           x = unit(0, "npc"), just = "left", gp=gpar(fontsize=25)),
                  t = 1, l = 2) |>
  gtable_add_grob(grobs = Olympic_rings, t = 1, l = 3) |>
  gtable_add_col_space(width = unit(5, "pt"))

fig_olympic_year$grobs[[which(fig_olympic_year$layout$name == "title")]] <- new_title

Olímpiada por esporte

#Filtrando os dados por ano olímpico
sex_per_sport <- olympics |>
  filter(season == "Summer" & year >= 1948) |>
  group_by(sport) |>
  count(sex) |>
  group_by(sport) |>
  mutate(percent = 100 * n / sum(n)) |>
  #filter(percent < 100) |>
  ungroup() |>
  dplyr::select(-n) |>
  tidyr::pivot_wider(
    names_from = sex,
    values_from = percent,
    names_prefix = "percent_"
  ) |>
  dplyr::mutate(sport = forcats::fct_reorder(sport, desc(percent_F))) |>
  tidyr::pivot_longer(cols = c("percent_F", "percent_M"),
                      values_to = "percent") |>
  dplyr::rename("sex" = name) |>
  dplyr::mutate(
    sex = stringr::str_remove(sex, "percent_"),
    sex = forcats::fct_relevel(sex, c("M", "F")),
    percent = replace_na(percent, 0)
  ) 




#participação por olímpiada e gênero
fig_olympic_sport <- sex_per_sport |>
  ggplot(mapping = aes(percent, sport)) +
  geom_col(aes(fill = sex, color = sex), position = "stack") +
  scale_color_manual(values = olympic_color, labs("")) +
  scale_fill_manual(values = olympic_color, labs("")) +
  geom_vline(xintercept = 50,
             linetype = "dashed",
             size = 0.7) +
  hrbrthemes::scale_x_percent(scale = 1) +
  hrbrthemes::theme_ipsum_pub() +
  theme(
    legend.title = element_blank(),
    panel.grid.major = element_blank(),
    panel.grid.minor  = element_blank(),
    legend.text = element_text(size = 24),
    axis.text.x = element_text(size = 24),
    axis.text.y = element_text(size = 24),
    plot.caption = element_text(size = 28),
    plot.subtitle = element_text(size = 28, vjust = -1)
  ) +
  labs(
    y = "",
    x = "",
    subtitle = "Participation by gender and sport",
    caption = "@talesgomes2709 | #tidytuesday | source: kaggle"
  ) +
  ggtitle("")


fig_olympic_sport <- ggplotGrob(fig_olympic_sport)
  
new_title <- gtable(unit(c(0.9, 9.3, 0.9), "in"), unit(0.9, "in")) |>
  gtable_add_grob(grobs = Olympic_rings, t = 1, l = 1) |>
  gtable_add_grob(textGrob(label = "Total olympic participation from 1948 to 2017",
                           x = unit(0, "npc"), just = "left", gp=gpar(fontsize=34)),
                  t = 1, l = 2) |>
  gtable_add_grob(grobs = Olympic_rings, t = 1, l = 3) |>
  gtable_add_col_space(width = unit(5, "pt"))

fig_olympic_sport$grobs[[which(fig_olympic_sport$layout$name == "title")]] <- new_title

Olímpiada por medalha

#Filtrando por Medalhas e esporte
medal_per_sex_sport <- olympics |>
  filter(season == "Summer" & medal != "None" & year >= 1948) |>
  with_groups(c(sport, sex, event), count, sex) |> 
  with_groups(sport, mutate, percent = 100 * n / sum(n)) |> 
  select(-n) |>
  pivot_wider(
    names_from = sex,
    values_from = percent,
    names_prefix = "percent_"
    ) |>
  mutate(percent_M = replace_na(percent_M, 0),
         percent_F = replace_na(percent_F, 0)) |> 
  select(-event) |> 
  with_groups(sport, summarise, sport,
              percent_F = sum(percent_F),
              percent_M = sum(percent_M)) |> 
  unique() |> 
  pivot_longer(cols = c("percent_F", "percent_M"),
               values_to = "percent") |>
  rename("sex" = name) |>
  mutate(
    sex = str_remove(sex, "percent_"),
    sex = fct_relevel(sex, c("M", "F"))
    )



fig_olympic_medal_sport <- medal_per_sex_sport |>
  ggplot() +
  geom_col(mapping = aes(percent,
                         fct_reorder2(sport, sex, percent, .desc = TRUE),
                         fill = sex, color = sex), position = "stack") +
  scale_color_manual(values = olympic_color, labs("")) +
  scale_fill_manual(values = olympic_color, labs("")) +
  geom_vline(xintercept = 50,
             linetype = "dashed",
             size = 0.7) +
  hrbrthemes::scale_x_percent(scale = 1) +
  hrbrthemes::theme_ipsum_pub() +
  theme(
    legend.title = element_blank(),
    panel.grid.major = element_blank(),
    panel.grid.minor  = element_blank(),
    panel.grid  = element_blank(),
    legend.text = element_text(size = 24),
    axis.text.x = element_text(size = 24),
    axis.text.y = element_text(size = 24),
    plot.caption = element_text(size = 28),
    plot.subtitle = element_text(size = 28, vjust = -1)
  ) +
  labs(
    y = "",
    x = "",
    subtitle = "Quantity of medal per gender and sport",
    caption = "@talesgomes2709 | #tidytuesday | source: kaggle"
  ) +
  ggtitle("")

     
fig_olympic_medal_sport <- ggplotGrob(fig_olympic_medal_sport)
  
new_title <- gtable(unit(c(0.9, 9.3, 0.9), "in"), unit(0.9, "in")) |>
  gtable_add_grob(grobs = Olympic_rings, t = 1, l = 1) |>
  gtable_add_grob(textGrob(label = "Total olympic participation from 1948 to 2017",
                           x = unit(0, "npc"), just = "left", gp=gpar(fontsize=34)),
                  t = 1, l = 2) |>
  gtable_add_grob(grobs = Olympic_rings, t = 1, l = 3) |>
  gtable_add_col_space(width = unit(5, "pt"))

fig_olympic_medal_sport$grobs[[which(fig_olympic_medal_sport$layout$name == "title")]] <- new_title

Analise dos Paises

Paises por esporte

#Gerando imagens esportes vs gênero
for (i in 1:3) {
  olympics_country <- olympics |>
    filter(NOC == countrys[i])
  country_title <- str_c(
      "Summer Olympics male to famale athletes proportion from 1964 to 2016 in",
      olympics_country$region[1],
      sep = " "
    )
  
 
  country_sex_per_sport <- olympics_country |>
  filter(season == "Summer" & year >= 1948) |>
  group_by(sport) |>
  count(sex) |>
  group_by(sport) |>
  mutate(percent = 100 * n / sum(n)) |>
  ungroup() |>
  dplyr::select(-n) |>
  tidyr::pivot_wider(
    names_from = sex,
    values_from = percent,
    names_prefix = "percent_"
  ) |>
  dplyr::mutate(sport = forcats::fct_reorder(sport, desc(percent_F))) |>
  tidyr::pivot_longer(cols = c("percent_F", "percent_M"),
                      values_to = "percent") |>
  dplyr::rename("sex" = name) |>
  dplyr::mutate(
    sex = stringr::str_remove(sex, "percent_"),
    sex = forcats::fct_relevel(sex, c("M", "F")),
    percent = replace_na(percent, 0)
  ) 

  
   
  #Gerando Lista de figuras para
  fig <- country_sex_per_sport |>
    ggplot(mapping = aes(percent, sport)) +
  geom_col(aes(fill = sex, color = sex), position = "stack") +
  scale_color_manual(values = country_colors[, i], labs("")) +
  scale_fill_manual(values = country_colors[, i], labs("")) +
  geom_vline(xintercept = 50,
             linetype = "dashed",
             size = 0.7) +
  hrbrthemes::scale_x_percent(scale = 1) +
  hrbrthemes::theme_ipsum_pub() +
  theme(
    legend.title = element_blank(),
    panel.grid.major = element_blank(),
    panel.grid.minor  = element_blank(),
    legend.text = element_text(size = 24),
    axis.text.x = element_text(size = 24),
    axis.text.y = element_text(size = 24),
    plot.caption = element_text(size = 20),
    plot.subtitle = element_text(size = 24, vjust = -1)
  ) +
  labs(
    y = "",
    x = "",
    subtitle = "Participation by gender and sport",
    caption = "@talesgomes2709 | #tidytuesday | source: kaggle"
  ) +
  ggtitle("")

  
  
  fig <- ggplotGrob(fig)
  
  new_title <- gtable(unit(c(0.9, 12.4, 0.9), "in"), unit(0.5, "in")) |>
  gtable_add_grob(grobs = flags[i], t = 1, l = 1) |>
  gtable_add_grob(textGrob(label = country_title,
                           x = unit(0, "npc"), just = "left", gp=gpar(fontsize=24)),
                  t = 1, l = 2) |>
  gtable_add_grob(grobs = flags[i], t = 1, l = 3) |>
  gtable_add_col_space(width = unit(5, "pt"))

  
  fig$grobs[[which(fig$layout$name == "title")]] <- new_title
  
  
  
  list_fig_sex_sport[[i]] <- fig
}

Agrupamento por olímpiadas

A seguir a participação

grid.draw(fig_olympic_year)

grid.draw(fig_olympic_sport)

grid.draw(fig_olympic_medal_sport)

Paises por esporte

grid.draw(list_fig_sex_sport[[1]])

grid.draw(list_fig_sex_sport[[2]])

grid.draw(list_fig_sex_sport[[3]])

Agrupamento por esporte praticado